# 单词数量统计

from pyspark import SparkContext,SparkConf
import os
os.environ["PYSPARK_PYTHON"] = "D:/Soft/Python/Python310/python.exe"

if __name__ == '__main__':
    # 获取文件内容
    conf = SparkConf().setMaster("local[*]").setAppName("test")
    sc = SparkContext(conf=conf)
    rdd=sc.textFile("data/hello.txt")
    results=rdd.flatMap(lambda ele:ele.split(" ")).map(lambda ele:(ele,1)).reduceByKey(lambda a,b:a+b).collect()
    print(results)
    # [('itcast', 4), ('python', 6), ('itheima', 7), ('spark', 4), ('pyspark', 3)]

    sc.stop()